package resolver.analyzer

import java.util.regex.Pattern

/**
 * analyzes the given text and try to find senseful word pais
 */
class SimpleTextAnalyzer implements Analyzer {

  /**
   * our pattern to match for chemicals
   * which will be matched case insensitve
   * you can find some examples in the test case what matches
   * 
   */
  def pattern = Pattern.compile(/(\bvitamin\w{0,2}\s\w\b)|([\(\w?[,\w]+\)*\-]*[\(\w+\)\-]*[[\w]?[,\w]+\-]*(\[\w+\])*\w{3,}+[[\-\w]+]*((\sacid)|(\sanhydride)|(\sbenzoate)|(\sketone)|(\sether)|(\sester)|(\scyanide))?)/,Pattern.CASE_INSENSITIVE)
  /**
   * analyzes the text and trys to find chemicals
   * @param text
   * @return
   */
  Set<String> analyze(String text) {

    //saves our results
    Set<String> result = new HashSet<String>()

    //save all matches in the match
    def match = (text =~ pattern)

    //saves the result
    match.each {List word ->

      //trim the word to avoid whitespace
      String w = word[0].trim()

      //only want to process words longer than 0
      if (w.size() > 0) {
        //add the word to the result list
        result.add(w)
      }
    }


    return result;
  }

  def buildPattern(){

  }

}
